Homework 4 , Spring 2022
Name: Anitha Ganapathy
Email: aganapa@iu.edu
Organizing the imports
# Import the necessary libraries
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# import matplotlib.mlab as mlab
import librosa
import soundfile as sf
from IPython.display import Audio
import cmath, math
import scipy.io # to read .mat files
import seaborn as sns
from numpy.core.fromnumeric import size
import scipy
from google.colab import drive
drive.mount('/content/drive')
dirpath = '/content/drive/MyDrive/MLSP_data/'
!ls $dirpath
trs_file = dirpath + 'trs.wav'
trn_file = dirpath + 'trn.wav'
trs, sr = librosa.load(trs_file , sr = None)
trn, sr = librosa.load(trn_file , sr = None)
trs.shape, trn.shape
Audio(trs, rate = sr)
Audio(trn, rate = sr)
hop_length = 512
S = librosa.stft(trs, n_fft=1024, hop_length = hop_length, window='hann')
N = librosa.stft(trn, n_fft=1024, hop_length = hop_length, window='hann')
S.shape , N.shape
S_abs = np.abs(S)
N_abs = np.abs(N)
G = S_abs + N_abs
G.shape
Audio(librosa.istft(G), rate = sr)
B
$$ \boldsymbol{B}_{f, t}= \begin{cases}1 & \text { if } \boldsymbol{S}_{f, t} \geq \boldsymbol{N}_{f, t} \\ 0 & \text { otherwise }\end{cases} $$The hope is that
S ≈ B ⊙ G
B = (S_abs > N_abs).astype(int)
B.shape
B
x_nmf_file = dirpath + 'x_nmf.wav'
x_nmf, sr = librosa.load(x_nmf_file , sr = None)
x_nmf.shape
Audio(x_nmf, rate = sr)
X = librosa.stft(x_nmf, n_fft=1024, hop_length = hop_length, window='hann')
X.shape
plt.specgram(x_nmf ,Fs = sr)
plt.suptitle("x_nmf wav, signal in frequency domain using the spectrogram ", y=1.05, fontsize=18)
plt.xlabel('Time')
plt.ylabel('Frequency')
plt.show()
Y = np.abs(X)
Y.shape
G.shape
def knn_source_separation(Y, G, k,B, X):
print("Y shape :", Y.shape) # Y shape : (513, 131)
print("G shape :", G.shape) # G shape : (513, 987)
euc_dist = np.zeros((Y.shape[1], G.shape[1]))
# we compare each column of Y with all columns in G to find out knearest neighbors.
for i in range(Y.shape[1]): # (columns : 131)
for j in range(G.shape[1]): # (columns : 989)
euc_dist[i][j] = np.sqrt(np.sum((Y[:, i] - G[:, j]) ** 2))
sorted_distances = euc_dist.argsort()
knn_indexes = sorted_distances[:,0:k] # k indexes with shortest distance
print("knn_indexes shape : ", knn_indexes.shape) # knn_indexes shape : (131, k)
# print(knn_indexes)
# D = IBM of the test signal
D = np.zeros((Y.shape[1],Y.shape[0])) # Y shape : (513, 131)
# B shape : (513, 989)
# By using the nearest neighbor indices, I can collect the IBM vectors
# from B, i.e. {B:,i1 ,B:,i2 , · · · ,B:,ik}.
for m in range(knn_indexes.shape[0]): # 131 for every row
b_val = np.zeros((513,1))
for n in range(knn_indexes.shape[1]): # 20 for every column index
# b_val.append(np.median(np.sort(B[:, knn_indexes[m][n]])))
col_index = int(knn_indexes[m][n])
median_value = B[:,col_index]
median_value = median_value.reshape((median_value.shape[0],1))
b_val = np.hstack((b_val,median_value))
median_k = np.median(b_val[:,1:], axis = 1)
D[m,:] = median_k
print(f'D.T shape : {D.T.shape}')
print(f'X shape : {X.shape}')
S_recov = np.multiply(D.T,X)
print(f'S_recov shape : {S_recov.shape}')
return S_recov
source_recov = knn_source_separation(Y, G, 12,B, X)
source_actual = librosa.istft(source_recov, hop_length = hop_length)
Audio(source_actual, rate = sr)
!ls $dirpath
eeg_mat = scipy.io.loadmat(dirpath+'eeg.mat')
print(eeg_mat.keys())
# print(mat.get('L'))
train_x = eeg_mat.get('x_train')
train_y = eeg_mat.get('y_train')
test_x = eeg_mat.get('x_te')
test_y = eeg_mat.get('y_te')
train_x.shape, train_y.shape, test_x.shape, test_y.shape
print(train_x[:,0,:].reshape(-1).shape)
train_x[:, 0, :].shape
def blackman_window(data, N, hop_length):
blackman = np.blackman(N)
new_data = []
hop_counter = 0
while hop_counter + N <= len(data):
frame = data[hop_counter: hop_counter+N]
vector = np.multiply(frame.T, blackman)
new_data.append(vector)
hop_counter = hop_counter + hop_length
return np.array(new_data).T
def stft_for_mu_freq(data):
stft_mu_vector = []
dft_eeg_sig = np.fft.fft(np.eye(64))
for i in range(3):
channel_window = blackman_window(data[:,i], 64, 48)
stft_mu = np.abs(np.dot(dft_eeg_sig, channel_window))[2:7,:].reshape(-1,1)
stft_mu_vector.append(stft_mu)
return np.array(stft_mu_vector).reshape(-1,1)
Creating the Z matrix
Z_train = []
Z_test = []
for i in range(train_x.shape[2]):
Z_train.append(stft_for_mu_freq(train_x[:,:,i]))
for i in range(test_x.shape[2]):
Z_test.append(stft_for_mu_freq(test_x[:,:,i]))
print(len(Z_train), len(Z_test))
print(len(Z_train[0]), len(Z_test[0]))
Z_train = np.array(Z_train).T.reshape(225,112)
Z_test = np.array(Z_test).T.reshape(225,28)
print(Z_train.shape, Z_test.shape)
print(Z_train.shape, Z_test.shape)
def matrix_A_calc(l,M):
np.random.seed(3232)
A = np.random.uniform(-1,1,l)
for i in range(len(M) - 1):
A = np.vstack((A,np.random.uniform(-1,1,l)))
sum_A = [sum(A[i]) for i in range(len(M))]
sum_A = np.array(sum_A)
inv_sum_A = 1/sum_A
B = A * inv_sum_A[:, np.newaxis]
return B.T
def matrix_Y_calc(A,Z):
Y = np.dot(A,Z)
Y_sign = np.sign(Y)
return Y,Y_sign
def hamming_dist(s1,s2):
return np.count_nonzero(s1 != s2)
def distance(Y,Y_test):
distance = np.zeros((28,112))
for i in range(distance.shape[0]):
for j in range(distance.shape[1]):
distance[i][j] = hamming_dist(Y_test[:,i],Y[:,j])
sorted_distance = distance.argsort()
final_index = np.zeros((Y_test.shape[1],Y.shape[1]))
for i in range(Y_test.shape[1]):
for j in range(Y.shape[1]):
index = sorted_distance[i][j]
final_index[i][j] = train_y[index,0]
return final_index
l_list =[]
acc_list = []
k = 12
for i in range(2,21,2):
l = i
A = matrix_A_calc(l,Z_train)
Y,Y_sign = matrix_Y_calc(A,Z_train)
Y_test,Y_test_sign = matrix_Y_calc(A,Z_test)
knn_dist_sorted = distance(Y_sign,Y_test_sign)
kNearestNeighbours = knn_dist_sorted[:,0:k]
final_y_test = np.zeros((test_y.shape[0],1))
for p in range(0,28):
final_y_test[p] = np.median(kNearestNeighbours[p,:])
count = 0
for p in range(0,28):
if final_y_test[p] == test_y[p]:
count+=1
accuracy = count/28
l_list.append(i)
acc_list.append(accuracy)
df = pd.DataFrame(list(zip(l_list, acc_list)), columns= ['LSH', 'Acc'])
df
# plot the bar graph
fig = plt.figure(figsize=(8,5))
ax = fig.add_axes([0,0,1,1])
plt.bar(l_list,acc_list, color ='lightblue',width = 0.9)
plt.plot(l_list,acc_list, color ='green')
plt.title("Bar plot of lsh vs the accuracy", fontsize = 20)
plt.suptitle("k value is : " +str(k), fontsize = 15)
plt.xticks(l_list)
plt.xlabel("LSH values", fontsize = 18)
plt.ylabel("accuracy", fontsize = 18)
plt.show()
!ls $dirpath
mat = scipy.io.loadmat(dirpath+'MDS_pdist.mat')
print(mat.keys())
print(mat.get('L'))
mds = np.array(mat.get('L'))
print("MDS Data shape: ",mds.shape)
mds[:5, :5]
The recipe for MDS
mds.shape
np.max(mds) , np.min(mds)
plt.figure(figsize=(5,5))
c= plt.pcolormesh(mds[:15, :15])
plt.show()
plt.figure(figsize=(15,8))
c= plt.pcolormesh(mds[:, :])
plt.colorbar(c)
plt.title('color mesh of MDS data', fontweight ="bold")
plt.show()
np.mean(mds, axis=1).shape # row wise mean
m_bar = np.mean(mds, axis=1)
m_bar.shape
m_tilde = mds - m_bar
m_tilde.shape
X_mat = m_tilde - np.mean(m_tilde, axis=0) # substraction of row wise mean
X_mat.shape
def eigen_decomposition(X, num_eigenvectors):
max_epochs = 50
eigen_val = []
eigen_vec = []
for i in range(num_eigenvectors):
w = np.random.rand(X.shape[0], 1)
for epoch in range(max_epochs):
w = np.dot(X, w)
w = w * (1/np.sqrt(np.sum(w ** 2)))
s1 = np.sqrt(np.sum(np.dot(w.T, X) ** 2))
u1 = np.dot(X.T, w) /s1
X = X - (s1 * np.dot(w, u1.T))
eigen_val.append(s1)
eigen_vec.append(w.reshape(-1))
eigen_vec = np.array(eigen_vec)
return eigen_vec.T, eigen_val
eigenvectors, eigenvalues = eigen_decomposition(X_mat, 2)
print(eigenvectors)
print(eigenvalues)
np.diag(eigenvalues)
matrix = np.dot(eigenvectors, np.sqrt(np.diag(eigenvalues)))
print(matrix.shape)
matrix
plt.figure(figsize=(20, 4))
# c= plt.pcolormesh(mds[:15, :15])
c= plt.pcolormesh(matrix.T)
plt.colorbar(c)
plt.title('color mesh of MDS data', fontweight ="bold")
plt.show()
plt.scatter(matrix[:,0], matrix[:,1], )
plt.title('MDS map', fontsize =20)
plt.show()
plt.scatter(matrix[:,1], matrix[:,0], )
plt.title('Rotated map', fontsize =20)
plt.show()
# %%shell
# jupyter nbconvert --to html /content/AG_MLSP_SP22_HW_4.ipynb